In [20]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys, os
%matplotlib inline
In [2]:
def AND(x1, x2):
w1, w2, theta = 0.5, 0.5, 0.7
tmp = x1*w1 + x2*w2
if tmp <= theta:
return 0
elif tmp > theta:
return 1
In [2]:
AND(1, 1)
Out[2]:
In [3]:
AND(1, 0)
Out[3]:
In [8]:
def AND2(x1, x2):
x = np.array([x1, x2])
w = np.array([0.5, 0.5])
b = -0.7
tmp = np.sum(w*x)+b
if tmp <= 0:
return 0
else:
return 1
In [9]:
AND2(1,1)
Out[9]:
In [10]:
AND2(0,1)
Out[10]:
In [11]:
AND2(0,0)
Out[11]:
In [4]:
def NAND(x1, x2):
x = np.array([x1, x2])
w = np.array([-0.5, -0.5])
b = 0.7
tmp = np.sum(w*x)+b
if tmp < 0:
return 0
else:
return 1
In [5]:
NAND(0,0)
Out[5]:
In [6]:
NAND(0,1)
Out[6]:
In [7]:
NAND(1,1)
Out[7]:
In [8]:
NAND(1,0)
Out[8]:
In [9]:
def OR(x1, x2):
x = np.array([x1,x2])
w = np.array([0.5, 0.5])
b = -0.2
tmp = np.sum(w*x) + b
if tmp <= 0:
return 0
else:
return 1
In [10]:
OR(0,0)
Out[10]:
In [11]:
OR(0,1)
Out[11]:
In [12]:
OR(1,0)
Out[12]:
In [13]:
OR(1,1)
Out[13]:
In [1]:
# xor 문제는 퍼셉트론으로 풀 수 없다.
# 그러나 직선이라는 제약을 없애면 가능! -> 다층 퍼셉트론을 사용
In [2]:
def XOR(x1, x2):
s1 = NAND(x1,x2)
s2 = OR(x1, x2)
y = AND(s1, s2)
return y
In [22]:
def step_function(x):
return np.array(x > 0, dtype=np.int)
In [26]:
np.arange?
In [29]:
x = np.arange(-5.0, 5.0, 0.1)
# 기존엔 np.arange([-5.0, 5.0, 0.1])로 했으나 [] 없이 사용 가능하게 변함
y = step_function(x)
plt.ylim(-0.1, 1.1)
plt.plot(x, y)
plt.show()
In [3]:
def sigmoid(x):
return 1 / (1 + np.exp(-x))
In [38]:
x = np.array([-5.0, 5.0, 0.1])
In [39]:
sigmoid(x)
Out[39]:
In [42]:
x = np.arange(-5, 5, 0.1)
In [43]:
y = sigmoid(x)
In [44]:
plt.plot(x, y)
plt.ylim(-0.1, 1.1)
plt.show()
In [45]:
def relu(x):
return np.maximum(0, x)
In [47]:
a = np.array([1,2,3,4])
print(a)
In [48]:
np.ndim(a)
Out[48]:
In [49]:
# 1차원도 튜플임
a.shape
Out[49]:
In [50]:
a.shape[0]
Out[50]:
In [51]:
b = np.array([[1,2],[3,4], [5,6]])
print(b)
In [52]:
np.ndim(b)
Out[52]:
In [54]:
b.shape
Out[54]:
In [55]:
a = np.array([[1,2], [3,4]])
b = np.array([[5,6], [7,8]])
In [56]:
a.shape
Out[56]:
In [57]:
b.shape
Out[57]:
In [58]:
np.dot(a,b)
Out[58]:
In [4]:
def identity_function(x):
return x
In [5]:
# 3층 신경망
def init_network():
network = {}
network['W1'] = np.array([[0.1, 0.3, 0.5], [0.2, 0.4, 0.6]])
network['b1'] = np.array([0.1, 0.2, 0.3])
network['W2'] = np.array([[0.1, 0.4], [0.2, 0.5], [0.3, 0.6]])
network['b2'] = np.array([0.1, 0.2])
network['W3'] = np.array([[0.1, 0.3], [0.2, 0.4]])
network['b3'] = np.array([0.1, 0.2])
return network
def forward(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = identity_function(a3)
return y
network = init_network()
x = np.array([1.0, 0.5])
y = forward(network, x)
print(y)
In [62]:
# 위의 함수는 출력층에서 항등 함수를 사용했습니다. 이번엔 소프트맥스 함수를 사용해보겠습니다!!
In [63]:
a = np.array([0.3, 2.9, 4.0])
In [64]:
exp_a = np.exp(a)
In [65]:
print(exp_a)
In [66]:
sum_exp_a = np.sum(exp_a)
In [67]:
print(sum_exp_a)
In [69]:
y = exp_a / sum_exp_a
In [70]:
print(y)
In [6]:
def softmax(a):
exp_a = np.exp(a)
sum_exp_a = np.sum(exp_a)
y = exp_a / sum_exp_a
return y
In [7]:
import sys, os
sys.path.append(os.pardir)
In [9]:
from dataset.mnist import load_mnist
In [11]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=True, normalize=False)
In [12]:
print(x_train.shape)
In [13]:
print(t_train.shape)
In [14]:
print(x_test.shape)
In [15]:
print(t_test.shape)
In [17]:
from PIL import Image
In [18]:
def img_show(img):
pil_img = Image.fromarray(np.uint8(img))
pil_img.show()
In [19]:
img = x_train[0]
print(img)
In [20]:
label = t_train[0]
print(label)
In [21]:
print(img.shape)
In [22]:
img = img.reshape(28,28) # 784 -> 28 x 28로 변환
In [23]:
print(img.shape)
In [24]:
img_show(img)
In [29]:
import pickle
In [30]:
def get_data():
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, flatten=True, one_hot_label=False)
return x_test, t_test
def init_network():
with open('./ch03/sample_weight.pkl', 'rb') as f:
network = pickle.load(f)
return network
def predict(network, x):
W1, W2, W3 = network['W1'], network['W2'], network['W3']
b1, b2, b3 = network['b1'], network['b2'], network['b3']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
z2 = sigmoid(a2)
a3 = np.dot(z2, W3) + b3
y = softmax(a3)
return y
In [34]:
x, t = get_data()
network = init_network()
accuracy_cnt = 0
for i in range(len(x)):
y = predict(network, x[i])
p = np.argmax(y)
if p == t[i]:
# print(p)
accuracy_cnt += 1
print("Accuracy:" +str(float(accuracy_cnt)/len(x)))
In [35]:
# 데이터를 특정 범위로 변환하는 처리 : 정규화
# 입력 데이터에 특정 변환을 가하는 것 : 전처리
In [36]:
# 배치 : 하나로 묶은 입력 데이터 (x[0], y[0]엔 0번째 이미지와 그 추론 결과가 저장됨)
In [37]:
x, _ = get_data()
network = init_network()
W1, W2, W3 = network['W1'], network['W2'], network['W3']
In [38]:
x.shape
Out[38]:
In [39]:
x[0].shape
Out[39]:
In [40]:
W1.shape
Out[40]:
In [41]:
W2.shape
Out[41]:
In [42]:
W3.shape
Out[42]:
In [47]:
x, t = get_data()
network = init_network()
batch_size = 100
accuracy_cnt = 0
for i in range(0, len(x), batch_size):
x_batch = x[i:i+batch_size]
y_batch = predict(network, x_batch)
p = np.argmax(y_batch, axis=1)
accuracy_cnt += np.sum(p ==t[i:i+batch_size])
print("Accuracy:" + str(float(accuracy_cnt)/len(x)))
In [51]:
x[1]
Out[51]:
In [52]:
x[1:2]
Out[52]:
In [53]:
x
Out[53]:
In [54]:
p
Out[54]:
In [3]:
def mean_squared_error(y, t):
return 0.5* np.sum((y-t)**2)
In [4]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0 ,0]
# 정답은 2
In [10]:
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
# 2일 확률이 가장 높다고 추정
In [9]:
np.array(t)
Out[9]:
In [8]:
mean_squared_error(np.array(y), np.array(t))
Out[8]:
In [11]:
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
# 7일 확률이 가장 높다고 추정
In [12]:
mean_squared_error(np.array(y), np.array(t))
Out[12]:
첫 번째 추정 결과가 오차가 더 작기에 정답에 더 가까운 것으로 판단 가능
교차 엔트로피 오차는 정답일 때의 출력이 전체 값을 정하게 됩니다
In [14]:
def cross_entropy_error(y, t):
delta = 1e-7
return -np.sum(t*np.log(y + delta))
# delta를 더하는 이유는 np.log() 안에 0을 입력하면 마이너스 무한대가 되어 계산이 진행되지 않기에..!
In [15]:
t = [0, 0, 1, 0, 0, 0, 0, 0, 0 ,0]
# 정답은 2
In [16]:
y = [0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0]
# 2일 확률이 가장 높다고 추정
In [17]:
cross_entropy_error(np.array(y), np.array(t))
Out[17]:
In [18]:
y = [0.1, 0.05, 0.1, 0.0, 0.05, 0.1, 0.0, 0.6, 0.0, 0.0]
# 7일 확률이 가장 높다고 추정
In [19]:
cross_entropy_error(np.array(y), np.array(t))
Out[19]:
In [21]:
sys.path.append(os.pardir)
In [22]:
from dataset.mnist import load_mnist
In [23]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
In [24]:
print(x_train.shape)
In [25]:
print(t_train.shape)
In [26]:
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
In [27]:
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
return -np.sum(t*np.log(y)) / batch_size
In [28]:
def cross_entropy_error(y, t):
if y.ndim == 1:
t = t.reshape(1, t.size)
y = y.reshape(1, y.size)
batch_size = y.shape[0]
return -np.sum(t*np.log(y[np.arange(batch_size), t])) / batch_size
In [29]:
# 중심 차분 ( 중앙 차분 ) : x를 중심으로 그 전후의 차분을 계산
In [31]:
def numerical_diff(f, x):
h = 1e-4
return (f(x+h) - f(x-h)) / (2*h)
In [32]:
def function_1(x):
return 0.01*x**2 + 0.1*x
In [33]:
x = np.arange(0.0, 20.0, 0.1)
In [34]:
y = function_1(x)
In [35]:
plt.xlabel('x')
plt.ylabel('f(x)')
plt.plot(x, y)
plt.show()
In [36]:
numerical_diff(function_1, 5)
Out[36]:
In [37]:
numerical_diff(function_1, 10)
Out[37]:
In [38]:
def function_2(x):
return x[0]**2+x[1]**2
In [41]:
def numerical_gradient(f, x):
h = 1e-4
grad = np.zeros_like(x) # x와 형상이 같은 배열 생성
for idx in range(x.size):
tmp_val = x[idx]
x[idx] = tmp_val + h
fxh1 = f(x)
x[idx] = tmp_val -h
fxh2 = f(x)
grad[idx] = (fxh1 - fxh2) / (2*h)
x[idx] = tmp_val
return grad
In [42]:
numerical_gradient(function_2, np.array([3.0, 4.0]))
Out[42]:
In [45]:
numerical_gradient(function_2, np.array([0.0, 2.0]))
Out[45]:
In [46]:
numerical_gradient(function_2, np.array([3.0, 0.0]))
Out[46]:
In [48]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x
for i in range(step_num):
grad = numerical_gradient(f, x)
x -= lr * grad
return x
In [49]:
## answer : 경사법으로 f(x0,x1) = x0^2 +x1^2의 최솟값을 구하라
In [50]:
def function_2(x):
return x[0]**2 + x[1]**2
In [51]:
init_x = np.array([-3.0, 4.0])
In [52]:
gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
Out[52]:
In [56]:
from common.functions import *
from common.gradient import numerical_gradient
In [57]:
# 2층 신경망
class TwoLayerNEt:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
return y
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t)
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y ==t) / float(x.shape[0])
return accuracy
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
In [60]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.two_layer_net import TwoLayerNet
In [61]:
# 데이터 읽기
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# 하이퍼파라미터
iters_num = 10000 # 반복 횟수를 적절히 설정한다.
train_size = x_train.shape[0]
batch_size = 100 # 미니배치 크기
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(iters_num):
# 미니배치 획득
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 기울기 계산
#grad = network.numerical_gradient(x_batch, t_batch)
grad = network.gradient(x_batch, t_batch)
# 매개변수 갱신
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
# 학습 경과 기록
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
# 1에폭당 정확도 계산
if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))
# 그래프 그리기
markers = {'train': 'o', 'test': 's'}
x = np.arange(len(train_acc_list))
plt.plot(x, train_acc_list, label='train acc')
plt.plot(x, test_acc_list, label='test acc', linestyle='--')
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
In [ ]: